*! version 2.0
* 21 August 2013
* NIDS
* Merging individuals across waves 1, 2 & 3

/*  

NOTE TO USER

This dataset contains individuals that are represented in Wave 1, Wave 2 and Wave 3 of NIDS, thus TSM's from all waves are ignored, 
as are new Wave 2 & Wave 3 CSM's introduced to the NIDS sample as a result of a CSM giving birth. We also ignore individuals that are 
deceased.

The resulting dataset is a balanced dataset of respondents that were alive in W1, W2 and W3. Their individual questionnaire 
outcomes have not been controlled for.

*/

*===========================================================================================================================================
* GLOBALS FOR DATA FILES AND VERSION SUFFIXES

global W1Data "\\137.158.104.21\data\Panel Public Release 2014a\Wave 1\Anon"
global W1VerIN "W1_Anon_V5.2"
global W2Data "\\137.158.104.21\data\Panel Public Release 2014a\Wave 2\Anon"
global W2VerIN "W2_Anon_V2.2"
global W3Data "\\137.158.104.21\data\Panel Public Release 2014a\Wave 3\Anon"
global W3VerIN "W3_Anon_V1.2"

global DataOUT "C:\Users\01406074\Desktop"
global VersionOUT "merged"

global temp "C:\Users\01406074\Desktop"					// tempfile to hold all the working datasets, all working datasets will
																// be deleted from this folder at the completion of the do file.
																		
version 12.0													// version of Stata being used, this is needed for the rename command.

*===========================================================================================================================================

* OPENING AND APPENDING W3 ADULT, CHILD & PROXY

* ADULT

use "$W3Data\Adult_$W3VerIN.dta", clear				// opening the dataset

rename w#_a* w#*									// replacing the current prefix with a wave specific prefix
gen w3_dataset = "Adult"							// generating a variable to indicate the source dataset

save "$temp\adult3.dta", replace					// saving the modified data as a temp file

* PROXY

use "$W3Data\Proxy_$W3VerIN.dta", clear				// opening the dataset

rename w#_p* w#*									// replacing the current prefix with a wave specific prefix
gen w3_dataset = "Proxy"							// generating a variable to indicate the source dataset

save "$temp\proxy3.dta", replace					// saving the modified data as a temp file

* CHILD

use "$W3Data\Child_$W3VerIN.dta", clear				// opening the dataset

rename w#_c* w#*									// replacing the current prefix with a wave specific prefix
gen w3_dataset = "Child"							// generating a variable to indicate the source dataset

append using "$temp\adult3.dta"						// appending the modified adult temp file to the modified child file
append using "$temp\proxy3.dta"						// appending the modified proxy temp file to the modified adult & child file

drop w3_outcome

save "$temp\w3_indi.dta", replace					// saving the modified data as a temp file

*-------------------------------------------------------------------------------------------------------------------------------------------

* OPENING AND APPENDING W2 ADULT, CHILD & PROXY

* ADULT

use "$W2Data\Adult_$W2VerIN.dta", clear				// opening the dataset

rename w#_a* w#*									// replacing the current prefix with a wave specific prefix
gen w2_dataset = "Adult"							// generating a variable to indicate the source dataset

save "$temp\adult2.dta", replace					// saving the modified data as a temp file

* PROXY

use "$W2Data\Proxy_$W2VerIN.dta", clear				// opening the dataset

rename w#_p* w#*									// replacing the current prefix with a wave specific prefix
gen w2_dataset = "Proxy"							// generating a variable to indicate the source dataset

save "$temp\proxy2.dta", replace					// saving the modified data as a temp file

* CHILD

use "$W2Data\Child_$W2VerIN.dta", clear				// opening the dataset

rename w#_c* w#*									// replacing the current prefix with a wave specific prefix
gen w2_dataset = "Child"							// generating a variable to indicate the source dataset

append using "$temp\adult2.dta"						// appending the modified adult temp file to the modified child file
append using "$temp\proxy2.dta"						// appending the modified proxy temp file to the modified adult & child file

drop w2_outcome

save "$temp\w2_indi.dta", replace					// saving the modified data as a temp file

*-------------------------------------------------------------------------------------------------------------------------------------------

* OPENING AND APPENDING W1 ADULT, CHILD & PROXY

* ADULT

use "$W1Data\Adult_$W1VerIN.dta", clear				// opening the dataset

rename w#_a* w#*									// replacing the current prefix with a wave specific prefix
gen w1_dataset = "Adult"							// generating a variable to indicate the source dataset

save "$temp\adult1.dta", replace					// saving the modified data as a temp file

* PROXY

use "$W1Data\Proxy_$W1VerIN.dta", clear				// opening the dataset

rename w#_p* w#*									// replacing the current prefix with a wave specific prefix
gen w1_dataset = "Proxy"							// generating a variable to indicate the source dataset

save "$temp\proxy1.dta", replace					// saving the modified data as a temp file

* CHILD

use "$W1Data\Child_$W1VerIN.dta", clear				// opening the dataset

rename w#_c* w#*									// replacing the current prefix with a wave specific prefix
gen w1_dataset = "Child"							// generating a variable to indicate the source dataset

append using "$temp\adult1.dta"						// appending the modified adult temp file to the modified child file
append using "$temp\proxy1.dta"						// appending the modified proxy temp file to the modified adult & child file

save "$temp\w1_indi.dta", replace					// saving the modified data as a temp file

*-------------------------------------------------------------------------------------------------------------------------------------------

* MERGING THE LINK FILE INTO W3 DATA, KEEPING ONLY RELEVANT VARIABLES. THEN MERGING IN W2 DATA

use "$W3Data\Link_File_$W3VerIN.dta", clear
drop csm w3_dead wave_died cluster 					 		 							// dropping variables from the link file that are not needed

merge 1:1 pid w1_hhid using "$temp\w1_indi.dta", gen(w1_link)							// merging the w1_indi data into the link file
drop if w1_link == 1																	// dropping all TSM's and CSM's that were not present in W1

merge 1:1 pid w2_hhid using "$temp\w2_indi.dta", gen(w2_link)							// merging the w2_indi data into the link file
keep if w2_link == 3																	// dropping respondents that died between Wave 1 & Wave 2 or did not exist in w1	

merge 1:1 pid w3_hhid using "$temp\w3_indi.dta", gen(w3_link)							// merging the w3_indi data into the link file dataset
drop if w3_link == 1																	// dropping CSM that died between Wave 2 & Wave 3
drop if w3_link == 2																	// dropping if Wave 3 data does not have a corresponding W2 record,
																						// in other words dropping new W2 TSM's who were not tracked in W3
order  pid w1_hhid w1_ind_outcome w1_dataset w2_hhid ///
w2_ind_outcome w2_dataset w3_hhid w3_ind_outcome w3_dataset		    					// ordering the data 

sort pid  w3_hhid																		// sorting the data
drop w*_link																			// dropping variables created by the two merges.

save "$DataOUT\W1_W2_W3_Indi_$VersionOUT.dta", replace									// saving out the created dataset

*-------------------------------------------------------------------------------------------------------------------------------------------

* ERASING THE TEMP FILES

erase "$temp\w3_indi.dta"
erase "$temp\w2_indi.dta"
erase "$temp\w1_indi.dta"
erase "$temp\adult1.dta"
erase "$temp\proxy1.dta"
erase "$temp\adult2.dta"
erase "$temp\proxy2.dta"
erase "$temp\adult3.dta"
erase "$temp\proxy3.dta"

* end of do file

*===========================================================================================================================================

